Heatmaps

Ubiquitous

experimental_metadata <- read.delim("data/ubiquitous/metadata_rem.txt", sep=",", header=TRUE, stringsAsFactors=FALSE)
annotation = data.frame(Condition = rep(c("Control","OKSM", "Senolytic", "Senolyic_OKSM"),
                                            c(3, 3, 2, 3)))

row.names(annotation) = experimental_metadata$sample_id 
anno_colours = list(Condition = c(Control = "#BF3100", OKSM = "#E9B44C", Senolytic = "#1B998B", Senolyic_OKSM = "#5D576B"))
pheatmap(ubiq_pam_clust[,1:(ncol(ubiq_pam_clust)-1)],
         color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         fontsize_row = 5.5,
         annotation_col = annotation,
         annotation_colors = anno_colours,
         cluster_rows = FALSE,
         cluster_cols = FALSE)

pam_clust_df %>% 
  dplyr::select(gene_name, Cluster) %>%
  datatable(options = list(scrollX = TRUE), class = "white-space: nowrap")
ubi_c1 <- pam_clust_df[pam_clust_df$Cluster == 1, ] %>%
  dplyr::select(-Cluster)

ubi_c2 <- pam_clust_df[pam_clust_df$Cluster == 2, ] %>%
  dplyr::select(-Cluster)

ubi_c3 <- pam_clust_df[pam_clust_df$Cluster == 3, ] %>%
  dplyr::select(-Cluster)

ubi_c4 <- pam_clust_df[pam_clust_df$Cluster == 4, ] %>%
  dplyr::select(-Cluster)

ubi_c5 <- pam_clust_df[pam_clust_df$Cluster == 5, ] %>%
  dplyr::select(-Cluster)

ubi_c6 <- pam_clust_df[pam_clust_df$Cluster == 6, ] %>%
  dplyr::select(-Cluster)

ubi_c7 <- pam_clust_df[pam_clust_df$Cluster == 7, ] %>%
  dplyr::select(-Cluster)

data.frame(Cluster = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5", 
                       "Cluster 6", "Cluster 7", "Total"),
                        Number_of_genes = c(nrow(ubi_c1), nrow(ubi_c2), nrow(ubi_c3), nrow(ubi_c4),nrow(ubi_c5), 
                                            nrow(ubi_c6), nrow(ubi_c7),
                                            sum(c(nrow(ubi_c1),nrow(ubi_c2),nrow(ubi_c3),nrow(ubi_c4),nrow(ubi_c5),
                                                  nrow(ubi_c6),nrow(ubi_c7)))))
##     Cluster Number_of_genes
## 1 Cluster 1             201
## 2 Cluster 2             157
## 3 Cluster 3             173
## 4 Cluster 4             100
## 5 Cluster 5             147
## 6 Cluster 6             204
## 7 Cluster 7             300
## 8     Total            1282

iPSC

pheatmap(ipsc_pam_clust[,1:(ncol(ipsc_pam_clust)-1)],
         color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
         fontsize_row = 5.5,
         annotation_col = annotation,
         annotation_colors = anno_colours,
         cluster_rows = FALSE,
         cluster_cols = FALSE)

ipsc_pam_clust_df <- ipsc_pam_clust %>%
  as.data.frame() %>%
  mutate(gene_name = ensembl.genes[rownames(.),]$external_gene_name)
datatable(ipsc_pam_clust_df, options = list(scrollX = TRUE), class = "white-space: nowrap")
ipsc_c1 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 1, ] %>%
  dplyr::select(-Cluster)

ipsc_c2 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 2, ] %>%
  dplyr::select(-Cluster)

ipsc_c3 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 3, ] %>%
  dplyr::select(-Cluster)

ipsc_c4 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 4, ] %>%
  dplyr::select(-Cluster)

ipsc_c5 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 5, ] %>%
  dplyr::select(-Cluster)

ipsc_c6 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 6, ] %>%
  dplyr::select(-Cluster)

ipsc_c7 <- ipsc_pam_clust_df[ipsc_pam_clust_df$Cluster == 7, ] %>%
  dplyr::select(-Cluster)

data.frame(Cluster = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", 
                       "Cluster 5", "Cluster 6", "Cluster 7", "Total"),
                        Number_of_genes = c(nrow(ipsc_c1), nrow(ipsc_c2), nrow(ipsc_c3), nrow(ipsc_c4),
                                            nrow(ipsc_c5), nrow(ipsc_c6), nrow(ipsc_c7),
                                            sum(c(nrow(ipsc_c1),nrow(ipsc_c2),nrow(ipsc_c3),nrow(ipsc_c4),
                                                  nrow(ipsc_c5),nrow(ipsc_c6),nrow(ipsc_c7)))))
##     Cluster Number_of_genes
## 1 Cluster 1             773
## 2 Cluster 2             397
## 3 Cluster 3             128
## 4 Cluster 4             566
## 5 Cluster 5             613
## 6 Cluster 6             248
## 7 Cluster 7             242
## 8     Total            2967

Gene Expression Boxplots

Ubiquitous

Cluster 1

cluster_boxplot(ubi_c1, "Cluster 1", "TdTom", "Sen_OKSM", "OKSM")

Cluster 2

cluster_boxplot(ubi_c2, "Cluster 2", "TdTom", "Sen_OKSM", "OKSM")

Cluster 3

cluster_boxplot(ubi_c3, "Cluster 3", "TdTom", "Sen_OKSM", "OKSM")

Cluster 4

cluster_boxplot(ubi_c4, "Cluster 4", "TdTom", "Sen_OKSM", "OKSM")

Cluster 5

cluster_boxplot(ubi_c5, "Cluster 5", "TdTom", "Sen_OKSM", "OKSM")

Cluster 6

cluster_boxplot(ubi_c6, "Cluster 6", "TdTom", "Sen_OKSM", "OKSM")

Cluster 7

cluster_boxplot(ubi_c7, "Cluster 7", "TdTom", "Sen_OKSM", "OKSM")

iPSC

Cluster 1

cluster_boxplot(ipsc_c1, "Cluster 1", "Control", "SO", "O")

Cluster 2

cluster_boxplot(ipsc_c2, "Cluster 2", "Control", "SO", "O")

Cluster 3

cluster_boxplot(ipsc_c3, "Cluster 3", "Control", "SO", "O")

Cluster 4

cluster_boxplot(ipsc_c4, "Cluster 4", "Control", "SO", "O")

Cluster 5

cluster_boxplot(ipsc_c5, "Cluster 5", "Control", "SO", "O")

Cluster 6

cluster_boxplot(ipsc_c6, "Cluster 6", "Control", "SO", "O")

Cluster 7

cluster_boxplot(ipsc_c7, "Cluster 7", "Control", "SO", "O")

Gene overlaps

Overall

olaps <- readPNG("output/comparison/overlaps.png")
p <- rasterGrob(olaps, interpolate = TRUE)
ggarrange(p)

I’ve manipulated the heatmaps such that Clusters 1 - 5 in the ubiquitous data is the same as Clusters 1 - 7 in the iPSC dataset for easy comparison

C1 ubiquitous dataset in C1 and/or C2 of iPSC dataset

Percentage of genes in c1 of ubiquitous dataset in c1 & c2 of iPSC dataset

denom <- c(rownames(ipsc_c1),rownames(ipsc_c2))
sum(rownames(ubi_c1) %in% denom)/length(denom)*100
## [1] 5.897436

Where else can we find c1 ubiquitous genes in iPSC dataset?

tab <- ipsc_pam_clust_df[rownames(ubi_c1),] %>%
  drop_na %>%
  group_by(Cluster) %>%
  tally()
tab
## # A tibble: 7 × 2
##   Cluster     n
##     <dbl> <int>
## 1       1    39
## 2       2    30
## 3       3     2
## 4       4     3
## 5       5     6
## 6       6    12
## 7       7     4

96 genes in Cluster 1 of the ubiquitous dataset is accounted for in the iPSC dataset. This implies that 1074 genes in C1 of the ubiquitous dataset is not expressed or significantly differentially expressed in the iPSC dataset.

C3 ubiquitous dataset in C4 and or C5 of iPSC dataset

Percentage of genes in c3 of ubiquitous dataset in c4 & c5 iPSC dataset

denom <- c(rownames(ipsc_c4),rownames(ipsc_c5))
sum(rownames(ubi_c3) %in% denom)/length(denom)*100
## [1] 4.240882

Where else can we find c3 ubiquitous genes in iPSC dataset?

tab <- ipsc_pam_clust_df[rownames(ubi_c3),] %>%
  drop_na %>%
  group_by(Cluster) %>%
  tally()
tab
## # A tibble: 7 × 2
##   Cluster     n
##     <dbl> <int>
## 1       1     8
## 2       2     4
## 3       3     2
## 4       4    31
## 5       5    19
## 6       6     9
## 7       7    14

87 genes in Cluster 3 of the ubiquitous dataset is accounted for in the iPSC dataset. This implies that 1092 genes in C3 of the ubiquitous dataset is not expressed or significantly differentially expressed in the iPSC dataset.

C4 ubiquitous dataset in C6 of iPSC dataset

Percentage of genes in c4 of ubiquitous dataset in c6 iPSC dataset

sum(rownames(ubi_c4) %in% rownames(ipsc_c6))/length(rownames(ipsc_c6))*100
## [1] 7.258065

Where else can we find c4 ubiquitous genes in iPSC dataset?

tab <- ipsc_pam_clust_df[rownames(ubi_c4),] %>%
  drop_na %>%
  group_by(Cluster) %>%
  tally()
tab
## # A tibble: 6 × 2
##   Cluster     n
##     <dbl> <int>
## 1       1    19
## 2       2     7
## 3       3     1
## 4       4     3
## 5       5    10
## 6       6    18

58 genes in Cluster 4 of the ubiquitous dataset is accounted for in the iPSC dataset. This implies that 190 genes in C4 of the ubiquitous dataset is not expressed or significantly differentially expressed in the iPSC dataset.

Summary Enrichments from compareCluster

Ubiquitous

genes = lapply(list(c1=ubi_c1, c2=ubi_c2, c3=ubi_c3, c4=ubi_c4, c5=ubi_c5, c6=ubi_c6, c7=ubi_c7), function(i){ensembl.genes[rownames(i)]$entrezgene_id})
bg = ensembl.genes[rownames(ddssva)]$entrezgene_id
compPathway <- compareCluster(geneCluster   = genes,
                              fun           = "enrichGO",
                              keyType       = "ENTREZID",
                              universe      = bg,
                              ont           = "BP",
                              OrgDb         = "org.Dm.eg.db",
                              pvalueCutoff  = 1,
                              qvalueCutoff  = 0.1,
                              pAdjustMethod = "BH")
dotplot(compPathway, showCategory = 10, title = "GO Enrichment Analysis") + theme(axis.text.y = element_text(size=8))

compPathway <- compareCluster(geneCluster   = genes,
                              fun           = "enrichKEGG",
                              keyType       = "ncbi-geneid",
                              universe      = bg,
                              organism      = "dme",
                              pvalueCutoff  = 1,
                              qvalueCutoff  = 0.2,
                              pAdjustMethod = "BH")
dotplot(compPathway, showCategory = 10, title = "KEGG Enrichment Analysis") + theme(axis.text.y = element_text(size=8))

iPSC

genes = lapply(list(c1=ipsc_c1, c2=ipsc_c2, c3=ipsc_c3, c4=ipsc_c4, c5=ipsc_c5, c6=ipsc_c6, c7=ipsc_c7), function(i){ensembl.genes[rownames(i)]$entrezgene_id})
compPathway <- compareCluster(geneCluster   = genes,
                              fun           = "enrichGO",
                              keyType       = "ENTREZID",
                              ont           = "BP",
                              OrgDb         = "org.Dm.eg.db",
                              pvalueCutoff  = 1,
                              qvalueCutoff  = 0.1,
                              pAdjustMethod = "BH")
dotplot(compPathway, showCategory = 8, title = "GO Enrichment Analysis") + theme(axis.text.y = element_text(size=8))

compPathway <- compareCluster(geneCluster   = genes,
                              fun           = "enrichKEGG",
                              keyType       = "ncbi-geneid",
                              organism      = "dme",
                              pvalueCutoff  = 1,
                              qvalueCutoff  = 0.1,
                              pAdjustMethod = "BH")
dotplot(compPathway, showCategory = 10, title = "KEGG Enrichment Analysis") + theme(axis.text.y = element_text(size=8))

Summary Enrichments from custom script

Ubiquitous

filenames = list.files("output/ubiquitous", pattern = "_go.rds", full.names = TRUE)
all_go = lapply(filenames, function(x){readRDS(x)})
names(all_go) <- c("c1", "c2", "c3", "c4", "c5", "c6", "c7")
ubi_go <- all_go %>%
  bind_rows(.id = "Cluster") %>%
  group_by(Cluster) %>%
  slice_min(order_by = Adjusted.P.value, n = 10)
ubi_go %>%
  mutate(Term = gsub("\\([^()]*\\)", "", Term),
         Term = str_to_title(Term)) %>%
  mutate(Cluster = factor(Cluster, levels = c("c1","c2", "c3", "c4","c5","c6","c7"))) %>%
  group_by(Cluster) %>%
  mutate(Term = factor(Term, levels = Term)) %>%
  ggplot(aes(x = Cluster, y = Term)) +
  geom_point(aes(colour = Adjusted.P.value, size = Ratio)) + 
  ylab(NULL) + 
  xlab("Cluster") +
  scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=FALSE)) + 
  scale_y_discrete(labels = function(x) str_wrap(x, width = 60),
                   limits=rev) +
  #labs(fill = "Adjusted p-value") +
  guides(colour = guide_colorbar(title = "Adjusted p-value", reverse = TRUE),
         size = guide_legend(title = "Gene Ratio")) +
  theme(axis.text.x = element_text(size=8)) +
  theme_bw() +
  ggtitle("GO Enrichment Analysis")

filenames = list.files("output/ubiquitous", pattern = "_kegg.rds", full.names = TRUE)
all_kegg = lapply(filenames, function(x){readRDS(x)})
names(all_kegg) <- c("c1", "c2", "c4", "c5", "c6", "c7")
ubi_kegg <- all_kegg %>%
  bind_rows(.id = "Cluster") %>%
  group_by(Cluster) %>%
  slice_min(order_by = Adjusted.P.value, n = 10)
ubi_kegg %>%
  mutate(Term = gsub("\\([^()]*\\)", "", Term),
         Term = str_to_title(Term)) %>%
  mutate(Cluster = factor(Cluster, levels = c("c1","c2","c4", "c5","c6","c7"))) %>%
  group_by(Cluster) %>%
  mutate(Term = factor(Term, levels = Term)) %>%
  ggplot(aes(x = Cluster, y = Term)) +
  geom_point(aes(colour = Adjusted.P.value, size = Ratio)) + 
  ylab(NULL) + 
  xlab("Cluster") +
  scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=FALSE)) + 
  scale_y_discrete(labels = function(x) str_wrap(x, width = 60),
                   limits=rev) +
  #labs(fill = "Adjusted p-value") +
  guides(colour = guide_colorbar(title = "Adjusted p-value", reverse = TRUE),
         size = guide_legend(title = "Gene Ratio")) +
  theme(axis.text.x = element_text(size=8)) +
  theme_bw() +
  ggtitle("KEGG Enrichment Analysis")

Session Info

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ReactomePA_1.38.0                     clusterProfiler_4.2.0                
##  [3] png_0.1-7                             ggpubr_0.4.0                         
##  [5] enrichR_3.0                           DT_0.19                              
##  [7] VennDiagram_1.7.0                     futile.logger_1.4.3                  
##  [9] RColorBrewer_1.1-2                    org.Dm.eg.db_3.14.0                  
## [11] scales_1.1.1                          reshape2_1.4.4                       
## [13] knitr_1.36                            biomaRt_2.50.0                       
## [15] GenomicFeatures_1.46.1                AnnotationDbi_1.56.2                 
## [17] genefilter_1.76.0                     DESeq2_1.34.0                        
## [19] SummarizedExperiment_1.24.0           Biobase_2.54.0                       
## [21] MatrixGenerics_1.6.0                  matrixStats_0.61.0                   
## [23] BSgenome.Dmelanogaster.UCSC.dm6_1.4.1 BSgenome_1.62.0                      
## [25] rtracklayer_1.54.0                    GenomicRanges_1.46.0                 
## [27] Biostrings_2.62.0                     GenomeInfoDb_1.30.0                  
## [29] XVector_0.34.0                        IRanges_2.28.0                       
## [31] S4Vectors_0.32.2                      BiocGenerics_0.40.0                  
## [33] forcats_0.5.1                         stringr_1.4.0                        
## [35] dplyr_1.0.7                           purrr_0.3.4                          
## [37] readr_2.0.2                           tidyr_1.1.4                          
## [39] tibble_3.1.6                          tidyverse_1.3.1                      
## [41] EnhancedVolcano_1.12.0                ggrepel_0.9.1                        
## [43] ggplot2_3.3.5                         pheatmap_1.0.12                      
## [45] cluster_2.1.2                        
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2               tidyselect_1.1.1         RSQLite_2.2.8           
##   [4] htmlwidgets_1.5.4        BiocParallel_1.28.0      scatterpie_0.1.7        
##   [7] munsell_0.5.0            withr_2.4.2              colorspace_2.0-2        
##  [10] GOSemSim_2.20.0          filelock_1.0.2           highr_0.9               
##  [13] ggalt_0.4.0              rstudioapi_0.13          ggsignif_0.6.3          
##  [16] DOSE_3.20.0              Rttf2pt1_1.3.9           labeling_0.4.2          
##  [19] GenomeInfoDbData_1.2.7   polyclip_1.10-0          farver_2.1.0            
##  [22] bit64_4.0.5              rprojroot_2.0.2          downloader_0.4          
##  [25] treeio_1.18.0            vctrs_0.3.8              generics_0.1.1          
##  [28] lambda.r_1.2.4           xfun_0.29                BiocFileCache_2.2.0     
##  [31] R6_2.5.1                 graphlayouts_0.7.1       ggbeeswarm_0.6.0        
##  [34] locfit_1.5-9.4           gridGraphics_0.5-1       bitops_1.0-7            
##  [37] cachem_1.0.6             fgsea_1.20.0             DelayedArray_0.20.0     
##  [40] assertthat_0.2.1         BiocIO_1.4.0             ggraph_2.0.5            
##  [43] enrichplot_1.14.1        beeswarm_0.4.0           gtable_0.3.0            
##  [46] ash_1.0-15               tidygraph_1.2.0          rlang_0.4.12            
##  [49] splines_4.1.2            lazyeval_0.2.2           rstatix_0.7.0           
##  [52] extrafontdb_1.0          checkmate_2.0.0          broom_0.7.10            
##  [55] yaml_2.2.1               abind_1.4-5              modelr_0.1.8            
##  [58] crosstalk_1.2.0          backports_1.3.0          qvalue_2.26.0           
##  [61] extrafont_0.17           tools_4.1.2              ggplotify_0.1.0         
##  [64] ellipsis_0.3.2           jquerylib_0.1.4          Rcpp_1.0.7              
##  [67] plyr_1.8.6               progress_1.2.2           zlibbioc_1.40.0         
##  [70] RCurl_1.98-1.5           prettyunits_1.1.1        viridis_0.6.2           
##  [73] cowplot_1.1.1            haven_2.4.3              fs_1.5.0                
##  [76] magrittr_2.0.1           data.table_1.14.2        futile.options_1.0.1    
##  [79] DO.db_2.9                reactome.db_1.77.0       reprex_2.0.1            
##  [82] patchwork_1.1.1          hms_1.1.1                evaluate_0.14           
##  [85] xtable_1.8-4             XML_3.99-0.8             readxl_1.3.1            
##  [88] gridExtra_2.3            compiler_4.1.2           maps_3.4.0              
##  [91] shadowtext_0.0.9         KernSmooth_2.23-20       crayon_1.4.2            
##  [94] htmltools_0.5.2          ggfun_0.0.4              tzdb_0.2.0              
##  [97] aplot_0.1.1              geneplotter_1.72.0       lubridate_1.8.0         
## [100] DBI_1.1.1                tweenr_1.0.2             formatR_1.11            
## [103] dbplyr_2.1.1             proj4_1.0-10.1           MASS_7.3-54             
## [106] rappdirs_0.3.3           Matrix_1.3-4             car_3.0-12              
## [109] cli_3.1.0                igraph_1.2.8             parallel_4.1.2          
## [112] pkgconfig_2.0.3          GenomicAlignments_1.30.0 xml2_1.3.2              
## [115] ggtree_3.2.0             annotate_1.72.0          vipor_0.4.5             
## [118] bslib_0.3.1              rvest_1.0.2              yulab.utils_0.0.4       
## [121] digest_0.6.28            graph_1.72.0             rmarkdown_2.11          
## [124] cellranger_1.1.0         fastmatch_1.1-3          tidytree_0.3.5          
## [127] restfulr_0.0.13          curl_4.3.2               graphite_1.40.0         
## [130] Rsamtools_2.10.0         rjson_0.2.20             nlme_3.1-153            
## [133] lifecycle_1.0.1          jsonlite_1.7.2           carData_3.0-4           
## [136] viridisLite_0.4.0        fansi_0.5.0              pillar_1.6.4            
## [139] lattice_0.20-45          ggrastr_1.0.1            KEGGREST_1.34.0         
## [142] fastmap_1.1.0            httr_1.4.2               survival_3.2-13         
## [145] GO.db_3.14.0             glue_1.5.0               bit_4.0.4               
## [148] ggforce_0.3.3            stringi_1.7.5            sass_0.4.0              
## [151] blob_1.2.2               memoise_2.0.0            ape_5.5